
##########################
# read coded killings keep only well-coded killings
full_set <- readRDS("temp/geocoded_shootings.rds") %>% 
  ungroup() %>% 
  mutate(score = ifelse(is.na(score), 100, as.numeric(score))) %>% 
  filter(score > 95)

## create function to find closest killing on any given day
find_closest <- function(bg_data_f, centroids_f, d){
  d = as.Date(d)
  
  ## keep killings occuring on that day
  sites <- filter(full_set,
                  date == d) %>%
    mutate(id = row_number())
  
  if(nrow(sites) > 0){ #if there were any killings on that day, do the following:
    ## create tree of those killings
    tree <- createTree(coordinates(select(sites, x = longitude, y = latitude)))
    
    ## find closest killing to each point in centroids_f
    inds <- knnLookup(tree , newdat = coordinates(centroids_f), k = 1)
    
    ## combine killing info and BG info
    bg_data_f <- left_join(cbind(bg_data_f, inds),
                           select(sites, id, longitude, latitude, date, id2),
                           by = c("inds" = "id"))
    
    ## calculate distance between BG and killing, keep if within 20 miles
    dist <- data.table(dist = pointDistance(select(bg_data_f, INTPTLON, INTPTLAT),
                                            select(bg_data_f, longitude, latitude), lonlat = T) * 0.000621371,
                       date = bg_data_f$date,
                       id = bg_data_f$id2,
                       GEOID = bg_data_f$GEOID) %>% 
      filter(dist <= 20)
  }else{
    dist <- data.table(dist = double(),
                       date = as.Date(character()),
                       id = integer(),
                       GEOID = character())
  }
  
  return(dist)
}

## this will keep one observation for every block group for every shooting within 20 miles
## over the 2 year-long periods
## loop over every state
for(s in unique(filter(fips_codes, state_code <= 56)$state_code)){
  print(s)
  if(!(file.exists(paste0("temp/bgs_dists_plac_new_", s, ".rds")))){
    
    ## pull BG shapefiles using tigris package
    bgs <- block_groups(state = s, class = "sp", year = 2019)
    
    centroids <- SpatialPoints(
      data.table(x = as.numeric(bgs@data$INTPTLON), y = as.numeric(bgs@data$INTPTLAT))
    )
    
    
    bg_data <- bgs@data %>%
      mutate_at(vars(INTPTLON, INTPTLAT), as.numeric)
    
    #########################################
    ## loop over every day between Jan 1, 2020, and Election day
    tot <- rbindlist(lapply(seq(as.Date("2019-05-03"), as.Date("2020-05-02"), by="days"), function(d){
      l <- find_closest(bg_data, centroids, d)
    }))
    
    tot2 <- rbindlist(lapply(seq(as.Date("2015-05-08"), as.Date("2016-05-07"), by="days"), function(d){
      l <- find_closest(bg_data, centroids, d)
    }))
    
    tot <- bind_rows(tot, tot2)
    
    ## for each state, save a table with 1 observation for each BG for each day with closest killing < 0.5
    saveRDS(tot, paste0("temp/bgs_dists_plac_new_", s, ".rds"))
  }
}

files <- list.files(path = "temp/", pattern = "^bgs_dists_plac_new_*", full.names = T)

all_bgs <- rbindlist(lapply(files, readRDS))

saveRDS(all_bgs, "temp/dists_long_new_plac.rds")
